/* OPTIONS OBS=5000  NOREPLACE ; */  

/* 
   This program imports the CART imputation .csv file for all industries in the 2002 Census of Manufactures
*/


%include "ASMimplibs.sas";



%macro import(data=);

            data WORK.&data                           ;
            %let _EFIERR_ = 0; /* set the ERROR detection macro variable */
            infile "&data..csv" delimiter = ',' MISSOVER DSD lrecl=32767 firstobs=2 ;
               informat number $6. ;
               informat NAICS_NEW_6 $6. ;
               informat TAE best32. ;
               informat CM best32.;
               informat TE best32.;
               informat PH best32. ;
               informat SW best32. ;
               informat TVS best32. ;
               informat dinv best32. ;
               informat energycmratio best32.;
               informat wwswratio best32. ;
               informat impsetnum best32. ;

               format number $6. ;
               format NAICS_NEW_6 $6. ;
               format TAE best12. ;
               format CM best12. ;
               format TE best12. ;
               format PH best12. ;
               format SW best12. ;
               format TVS best12. ;
               format dinv best12. ;
               format energycmratio best12. ;
               format wwswratio best12. ;
               format impsetnum best12. ;
            input
                        number $
			NAICS_NEW_6 $
                        TAE
                        CM
                        TE
                        PH
			SW
                        TVS
                        dinv
                        energycmratio
                        wwswratio
                        impsetnum
            ;
            if _ERROR_ then call symputx('_EFIERR_',1);  /* set ERROR detection macro variable */
            run;

%mend;


%import(data=imputes_all_inds02_311);
%import(data=imputes_all_inds02_3113_3119);
%import(data=imputes_all_inds02_312_315);
%import(data=imputes_all_inds02_316_321);
%import(data=imputes_all_inds02_322_323);
%import(data=imputes_all_inds02_324_327);
%import(data=imputes_all_inds02_331_332);
%import(data=imputes_all_inds02_333_339);
%import(data=imputes_all_inds02_333913_);

data imputes_all_inds02;
 set 	imputes_all_inds02_311
     	imputes_all_inds02_3113_3119
     	imputes_all_inds02_312_315
     	imputes_all_inds02_316_321
	imputes_all_inds02_322_323
	imputes_all_inds02_324_327
	imputes_all_inds02_331_332
	imputes_all_inds02_333_339
	imputes_all_inds02_333913_;
run;
 
   data imputes_all_inds02 (keep = NUMBER_NUM NAICS_NEW_6 TVS dinv TE PH SW TAE CM energycmratio wwswratio impsetnum); 
   set imputes_all_inds02; 
   NUMBER_NUM = input(NUMBER,6.);
  run;


  PROC DATASETS  LIBRARY=WORK;
   MODIFY imputes_all_inds02;
   rename NUMBER_NUM = number;
  RUN;

  PROC SORT DATA=imputes_all_inds02;
   by NUMBER;
  RUN;


  /** Merge ids **/

 data gooddata_ids_all_inds02 (keep = number  survu_id firmid  ); 
  set allcmf.gooddata_ids_all_inds02 ;
 run;
 
 proc sort data=gooddata_ids_all_inds02; by number; run;

 /* NOTE: Some industries may be missing from CART imputes because 
    there were not 2 or more variables with missing data for those industries. 
 */

  data imputes_all_inds02_postmerge in_ids_not_in_imputes02;
   merge gooddata_ids_all_inds02 (in=inids) imputes_all_inds02 (in=inimp); 
  by NUMBER;
  if inids and inimp then output imputes_all_inds02_postmerge;
  if inids and inimp=0 then output in_ids_not_in_imputes02;
  run;

  proc datasets library=work;
   modify imputes_all_inds02_postmerge ;
    rename impsetnum=_IMPUTE_;
  run;

  /*** Check the industries of the plants in the original dataset 
       that aren't in the CART imputations dataset.
       Want to make sure all of these are industries with < 2 variables
       with imputes/missing values.
  ***/

  data gooddata_all_inds02 (keep = survu_id NAICS_NEW_6);
   set allcmf.gooddata_all_inds02;

  proc sort data=in_ids_not_in_imputes02; by survu_id; run;
  proc sort data=gooddata_all_inds02; by survu_id; run;
  
  data in_ids_not_in_imputes02;
   merge in_ids_not_in_imputes02 (in=inid) gooddata_all_inds02 (in=indata);
   by survu_id;
   if inid and indata;
  run;

  proc freq data=in_ids_not_in_imputes02;
    tables NAICS_NEW_6;
   title "industries of plants in ID data but not in CART imputes dataset";
  run;

  /*** For industries/plants in the original dataset 
       that aren't in the CART imputations dataset,
       get the original observations (which might include 
       Census Bureau imputations for no more than 1 variable).
  ***/

  data survuids_not_in_imputes02 (keep = survu_id  );
   set in_ids_not_in_imputes02;
  run;
  
  data cmf02_nonAR (keep = survu_id firmid NAICS_NEW_6 TVS TE PH SW TAE CM dinv energycmratio wwswratio);
   set allcmf.cmf2002_nonAR;
    dinv = tie-tib;
    energycmratio = sum(ee,cf)/cm;
    wwswratio = ww/sw;
  run;

  proc sort data=survuids_not_in_imputes02; by survu_id; run;
  proc sort data=cmf02_nonAR; by survu_id; run;

  data cmf_data_not_in_imputes02 in_surv_not_in_cmf02;
   merge survuids_not_in_imputes02 (in=insurv) cmf02_nonAR (in=incmf);
   by survu_id;
   if insurv and incmf then output cmf_data_not_in_imputes02;
   else if insurv then output in_surv_not_in_cmf02;  * this dataset should be empty;
  run;
 
    
  /*** Create 100 duplicates of these
       observations so that we can include them in the 
       TFPR IQR calculations with the 100 CART implicates.
  ***/ 
  data cmf_data_not_in_imputes02;
   set cmf_data_not_in_imputes02;
    do _IMPUTE_ = 1 to 100;
     output cmf_data_not_in_imputes02;
    end;
  run;

   /** Stack the industries with non-imputed data with the CART-completed dataset,
       add the year variable and save the dataset.
   ***/

    data allcmf.imputes_all_inds02;
     set cmf_data_not_in_imputes02
         imputes_all_inds02_postmerge;
      year=2002;
    run;




